# coding:utf-8
from pyspark import SparkConf, SparkContext
from operator import add
if __name__ == '__main__':

    # 初始化执行环境，构建SparkContext对象
    conf = SparkConf().setMaster("local[*]").setAppName("BrowserCount")
    sc = SparkContext(conf=conf)

    # 读取数据
    file_rdd = sc.textFile("../csv文件/各平台热门城市综合分析.csv")
    # 切分并提取浏览量数据
    split_rdd = file_rdd.map(lambda x: x.split(","))
    city_rdd = split_rdd.map(lambda x: (x[0], 1))  # (key,value)=(城市，1)

    # reduce 数据排序
    result = city_rdd.reduceByKey(add).sortBy(lambda x: x[1]).take(30)
    city_rdd.reduceByKey(add).sortBy(lambda x: x[1]).saveAsTextFile("hdfs://master:9000/user/travel_analyse/result1")
    print(result)